learning resources

proof of least squares solution

https://math.stackexchange.com/questions/131590/derivation-of-the-formula-for-ordinary-least-squares-linear-regression

linear regression using the linear model (lm) function

how to obtain coefficients (intercept and slope for a linear model)

library(UsingR)
data(diamonds)
library(ggplot2)
fit <- lm(price ~ carat, data = diamond)
coef(fit)
(Intercept)       carat 
  -259.6259   3721.0249 

regression on centred data will give intercept that is y value for mean value of x

  • the I notation is a shortcut to allow evaluation of variable in-line with lm call
fit2 <- lm(price ~ I(carat - mean(carat)), data = diamond)
coef(fit2)
##            (Intercept) I(carat - mean(carat)) 
##               500.0833              3721.0249

making predictions with a model

newx <- c(0.16, 0.27, 0.34)
predict(fit, newdata = data.frame(carat = newx))
##         1         2         3 
##  335.7381  745.0508 1005.5225

plotting regression lines in base plot

data(diamond)
plot(diamond$carat, diamond$price,  
     xlab = "Mass (carats)", 
     ylab = "Price (SIN $)", 
     bg = "lightblue", 
     col = "black", cex = 1.1, pch = 21,frame = FALSE)
abline(lm(price ~ carat, data = diamond), lwd = 2)
points(diamond$carat, predict(fit), pch = 19, col = "red") 

plotting regression lines in ggplot

g <- ggplot(diamond, aes(x=carat, y=price))
g <- g + xlab("Mass (carats)")
g <- g + ylab("Price (dollars)")
g <- g + geom_point(size = 6, colour = "black", alpha = 0.2)
g <- g + geom_point(size = 5, colour = "blue", alpha = 0.2)
g <- g + geom_smooth(method="lm", colour="black")
g

plotting interactive regression plot in plotly

https://plot.ly/ggplot2/geom_abline/

library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:Hmisc':
## 
##     subplot
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:MASS':
## 
##     select
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
p <- ggplotly(g)
p

obtain residuals for a fit

e <- resid(fit)
#or
fit$residuals
##           1           2           3           4           5           6 
## -17.9483176  -7.7380691 -22.9483176 -85.1585661 -28.6303057   6.2619309 
##           7           8           9          10          11          12 
##  23.4721795  37.6311854 -38.7893116  24.4721795  51.8414339  40.7389488 
##          13          14          15          16          17          18 
##   0.2619309  13.4209369  -1.2098087  40.5287002  36.1029250 -44.8405542 
##          19          20          21          22          23          24 
##  79.3696943 -25.0508027  57.8414339   9.2619309 -20.9483176  -3.7380691 
##          25          26          27          28          29          30 
## -19.9483176  27.8414339 -54.9483176   8.8414339 -26.9483176  16.4721795 
##          31          32          33          34          35          36 
## -22.9483176 -13.1020453 -12.1020453  -0.5278205   3.2619309   2.2619309 
##          37          38          39          40          41          42 
##  -1.2098087 -43.2098087 -27.9483176 -23.3122938 -15.6303057  43.2672091 
##          43          44          45          46          47          48 
##  32.8414339   7.3696943   4.3696943 -11.5278205 -14.8405542  17.4721795
#